###############
##PLEASE NOTE##
###############

# The following script samples the 24,000 tweets that were crowd-
# coded in the first round. Thus, it reproduces the numerical 
# results of the second paragraph of the section called "Design of 
# content analysis." For the remaining analyses, see the file
# "script_All_remaining_analyses.do." Before running the current 
# script, remember to change the working directory in line 
# 16 and to load the required packages.

remove(list = ls())
library(openxlsx)
library(dplyr)
setwd("U:/Papers/Degrees of Disrespect/All_data_and_dofiles/Replication files for JOP")

##########################
##SAMPLING 24,000 TWEETS##
##########################

#Loading data
All_tweets <- read.xlsx("data_All_tweets_from_which_24000_are_sampled.xlsx")

#Showing date range and total number of tweets (see paragraph starting with "As a first step...")
min(All_tweets$time)
max(All_tweets$time)
count(All_tweets)

#Extracting all retweets and tweets not by individual members
Relevant_tweets <- All_tweets[(All_tweets$retweet==0 & All_tweets$type == "member"),]

#Drawing sample of 24,000 tweets for first round of crowdsourced coding
Randomly_selected_tweet_IDs <- read.xlsx("data_IDs_of_randomly_sampled_tweets.xlsx")
Relevant_tweets <- left_join(Relevant_tweets, Randomly_selected_tweet_IDs, by = "ID1")
Sample_to_be_coded <- subset(Relevant_tweets,randomly_selected==1)

write.xlsx(Sample_to_be_coded,"Sample_to_be_coded.xlsx")